In [1]:

    
%matplotlib inline

import numpy as np
import pandas as pd

from sklearn import grid_search
from sklearn import metrics
from sklearn import cross_validation
from sklearn.externals import joblib

import xgboost as xgb

import matplotlib.pyplot as plt
import seaborn as sns

import operator
import itertools
import random
import os
import pickle
import time

PATHS



In [2]:

    
DATA_DIRECTORY = "E:\\eaglesense\\data\\topviewkinect"
PREPROCESSED_DIRECTORY = DATA_DIRECTORY + "\\all"
FEATURE_SET = "eval-chi2"



In [3]:

    
if not os.path.exists("results"):
    os.makedirs("results")

LOAD DATA



In [4]:

    
features_csv = "{root}/{tag}_features.csv".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
features_df = pd.read_csv(features_csv)



In [5]:

    
features_df.head()









    Out[5]:






  
    
      
      layer_area_0
      layer_area_1
      layer_area_2
      layer_contours_0
      layer_contours_1
      layer_distance_0
      layer_distance_1
      layer_distance_2
      layer_distance_3
      layer_distance_4
      ...
      interlayer_pos_16
      interlayer_pos_17
      extremities0
      extreme_infrared_0
      extreme_infrared_1
      extreme_infrared_2
      extreme_infrared_3
      extreme_infrared_4
      extreme_infrared_5
      subject
    
  
  
    
      0
      0.297578
      0.411765
      0.290657
      3.0
      3.0
      16.5529
      26.6833
      26.0192
      26.6833
      201.0
      ...
      -26.0
      -107.0
      4.0
      0.000000e+00
      10.0
      11.5
      11.5
      0.0
      11.5
      2001.0
    
    
      1
      0.310345
      0.419238
      0.270417
      3.0
      3.0
      16.4012
      26.4764
      26.0192
      26.4764
      191.5
      ...
      -26.0
      -105.0
      5.0
      5.000000e-01
      9.0
      11.0
      1.0
      0.5
      11.0
      2001.0
    
    
      2
      0.333959
      0.386492
      0.279550
      3.0
      3.0
      16.4012
      26.2488
      26.1725
      26.2488
      170.5
      ...
      -25.0
      -103.0
      5.0
      2.075076e-322
      12.5
      4.5
      4.5
      0.5
      13.0
      2001.0
    
    
      3
      0.348399
      0.384181
      0.267420
      3.0
      3.0
      16.4012
      26.4197
      26.4764
      26.4197
      164.0
      ...
      -25.0
      -103.0
      5.0
      0.000000e+00
      6.0
      4.5
      0.0
      0.0
      7.0
      2001.0
    
    
      4
      0.356383
      0.370567
      0.273050
      3.0
      3.0
      17.7200
      27.4591
      27.4591
      27.4591
      164.5
      ...
      -26.0
      -107.0
      3.0
      0.000000e+00
      0.0
      0.5
      0.0
      0.0
      0.5
      2001.0
    
  

5 rows × 73 columns



In [6]:

    
labels_csv = "{root}/{tag}_labels.csv".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
labels_df = pd.read_csv(labels_csv)



In [7]:

    
s1_data_path = "{root}/{tag}_s1_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
s2_data_path = "{root}/{tag}_s2_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
cs_data_path = "{root}/{tag}_cs_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)
noinfrared_data_path = "{root}/{tag}_cs_noinfrared_data.pickle".format(root=PREPROCESSED_DIRECTORY, tag=FEATURE_SET)

with open(s1_data_path, "rb") as f:
    s1_data = pickle.load(f)
    
with open(s2_data_path, "rb") as f:
    s2_data = pickle.load(f)

with open(cs_data_path, "rb") as f:
    cs_data = pickle.load(f)

with open(noinfrared_data_path, "rb") as f:
    noinfrared_data = pickle.load(f)



In [8]:

    
unique_subjects = features_df["subject"].unique()
unique_subjects









    Out[8]:





array([ 2001.,  2002.,  2003.,  2004.,  2005.,  2006.,  2007.,  2008.,
        2009.,  2010.,  2011.,  2012.])



In [9]:

    
ACTIVITIES = ["Standing", "Sitting", "Pointing", "Phone", "Tablet", "Paper"]



In [10]:

    
num_activities = len(ACTIVITIES)
num_activities









    Out[10]:





6

PARAMETERS



In [11]:

    
XGB_PARAM_FINAL = {}
XGB_PARAM_FINAL["eta"] = 0.3
XGB_PARAM_FINAL["gamma"] = 1
XGB_PARAM_FINAL["lambda"] = 1
XGB_PARAM_FINAL["alpha"] = 0
XGB_PARAM_FINAL["max_depth"] = 6
XGB_PARAM_FINAL["colsample_bytree"] = 0.5
XGB_PARAM_FINAL["colsample_bylevel"] = 0.5
XGB_PARAM_FINAL["subsample"] = 0.5
XGB_PARAM_FINAL["objective"] = "multi:softmax"
XGB_PARAM_FINAL["eval_metric"] = "merror"
XGB_PARAM_FINAL["num_class"] = len(ACTIVITIES)
XGB_PARAM_FINAL["silent"] = 0
XGB_NUM_ROUNDS = 200
XGB_EARLYSTOPPING_ROUNDS = 30

UTILITY



In [12]:

    
def crosssubject_test_split(features_df, labels_df, training_subjects_ids):
    num_features = features_df.shape[1] - 1
    
    X_train = np.array([], dtype=np.float64).reshape(0, num_features)
    y_train = np.array([], dtype=np.int32).reshape(0, 1)
    X_test = np.array([], dtype=np.float64).reshape(0, num_features)
    y_test = np.array([], dtype=np.int32).reshape(0, 1)

    for subject_id in unique_subjects:
        subject_features = features_df[features_df["subject"] == subject_id]
        subject_features = subject_features.drop(["subject"], axis=1)
        subject_labels = labels_df[labels_df["subject"] == subject_id]
        subject_labels = subject_labels[["activity"]]
        subject_X = subject_features.values
        subject_y = subject_labels.values

        if subject_id in training_subjects_ids:
            X_train = np.vstack([X_train, subject_X])
            y_train = np.vstack([y_train, subject_y])
        else:
            X_test = np.vstack([X_test, subject_X])
            y_test = np.vstack([y_test, subject_y])
    
    return X_train, y_train, X_test, y_test



In [13]:

    
def get_normalized_confusion_matrix(y_true, y_predicted):
    confusion_matrix = metrics.confusion_matrix(y_true, y_predicted)
    confusion_matrix_normalized = confusion_matrix.astype("float") / confusion_matrix.sum(axis=1)[:, np.newaxis]
    confusion_matrix_normalized *= 100
    return confusion_matrix_normalized

Samples Test 1



In [14]:

    
s1_X_train = s1_data["X_train"]
s1_y_train = s1_data["y_train"]
s1_X_test = s1_data["X_test"]
s1_y_test = s1_data["y_test"]



In [15]:

    
s1_X_train.shape









    Out[15]:





(25653, 72)



In [16]:

    
s1_X_test.shape









    Out[16]:





(51371, 72)



In [17]:

    
s1_train_xgbmatrix = xgb.DMatrix(s1_X_train, s1_y_train)
s1_test_xgbmatrix = xgb.DMatrix(s1_X_test, s1_y_test)
s1_watchlist = [(s1_train_xgbmatrix, "train"), (s1_test_xgbmatrix, "eval")]



In [18]:

    
s1_eval_results = {}
s1_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=s1_train_xgbmatrix, evals=s1_watchlist, evals_result=s1_eval_results,
                          num_boost_round=XGB_NUM_ROUNDS, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)









    



[0]	train-merror:0.078509	eval-merror:0.086333
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
[100]	train-merror:0.000936	eval-merror:0.015573



In [19]:

    
s1_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=s1_train_xgbmatrix, num_boost_round=s1_validation.best_iteration+1)



In [20]:

    
s1_y_predicted = s1_booster.predict(s1_test_xgbmatrix)



In [21]:

    
s1_accuracy = metrics.accuracy_score(s1_y_test, s1_y_predicted)
s1_accuracy









    Out[21]:





0.98473847112183921



In [22]:

    
s1_confusion_matrix = get_normalized_confusion_matrix(s1_y_test, s1_y_predicted)



In [23]:

    
s1_results_dump = {
    "eval_results": s1_eval_results,
    "eval_earlystoppping_best_iteration": s1_validation.best_iteration+1,
    "eval_earlystoppping_best_score": s1_validation.best_score,
    "classifier": s1_booster,
    "final_accuracy": s1_accuracy,
    "final_confusion_matrix": s1_confusion_matrix
}

with open("results/s1.pickle", "wb") as f:
    pickle.dump(s1_results_dump, f)

Samples Test 2



In [24]:

    
s2_X_train = s2_data["X_train"]
s2_y_train = s2_data["y_train"]
s2_X_test = s2_data["X_test"]
s2_y_test = s2_data["y_test"]



In [25]:

    
s2_X_train.shape









    Out[25]:





(51324, 72)



In [26]:

    
s2_X_test.shape









    Out[26]:





(25700, 72)



In [27]:

    
s2_train_xgbmatrix = xgb.DMatrix(s2_X_train, s2_y_train)
s2_test_xgbmatrix = xgb.DMatrix(s2_X_test, s2_y_test)
s2_watchlist = [(s2_train_xgbmatrix, "train"), (s2_test_xgbmatrix, "eval")]



In [28]:

    
s2_eval_results = {}
s2_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=s2_train_xgbmatrix, evals=s2_watchlist, evals_result=s2_eval_results, 
                          num_boost_round=XGB_NUM_ROUNDS, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)









    



[0]	train-merror:0.076085	eval-merror:0.081556
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
[100]	train-merror:0.001013	eval-merror:0.011206



In [29]:

    
s2_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=s2_train_xgbmatrix, num_boost_round=s2_validation.best_iteration+1)



In [30]:

    
s2_y_predicted = s2_booster.predict(s2_test_xgbmatrix)



In [31]:

    
s2_accuracy = metrics.accuracy_score(s2_y_test, s2_y_predicted)
s2_accuracy









    Out[31]:





0.98972762645914392



In [32]:

    
s2_confusion_matrix = get_normalized_confusion_matrix(s2_y_test, s2_y_predicted)



In [33]:

    
s2_results_dump = {
    "eval_results": s2_eval_results,
    "eval_earlystoppping_best_iteration": s2_validation.best_iteration+1,
    "eval_earlystoppping_best_score": s2_validation.best_score,
    "classifier": s2_booster,
    "final_accuracy": s2_accuracy,
    "final_confusion_matrix": s2_confusion_matrix
}

with open("results/s2.pickle", "wb") as f:
    pickle.dump(s2_results_dump, f)

Cross-subject Test 1



In [14]:

    
cs_X_train = cs_data["X_train"]
cs_y_train = cs_data["y_train"]
cs_X_test = cs_data["X_test"]
cs_y_test = cs_data["y_test"]



In [16]:

    
cs_X_train.shape









    Out[16]:





(34945, 72)



In [17]:

    
cs_X_test.shape









    Out[17]:





(42079, 72)

RANDOM FOREST



In [37]:

    
from sklearn import ensemble



In [38]:

    
rf_clf = ensemble.RandomForestClassifier(n_estimators=100, criterion="entropy", max_depth=None, max_features="sqrt", 
                                         random_state=42, n_jobs=-1)



In [39]:

    
rf_training_start = time.time()
rf_clf.fit(cs_X_train, cs_y_train.ravel())
rf_training_time = (time.time() - rf_training_start)
rf_training_time









    Out[39]:





2.5243890285491943



In [40]:

    
rf_testing_start = time.time()
rf_y_predicted = rf_clf.predict(cs_X_test)
rf_testing_time = (time.time() - rf_testing_start)
rf_testing_time









    Out[40]:





0.2706270217895508



In [41]:

    
rf_y_train_predicted = rf_clf.predict(cs_X_train)
rf_train_accuracy = metrics.accuracy_score(cs_y_train, rf_y_train_predicted)
rf_train_accuracy









    Out[41]:





1.0



In [42]:

    
rf_accuracy = metrics.accuracy_score(cs_y_test, rf_y_predicted)
rf_accuracy









    Out[42]:





0.84438793697568859



In [43]:

    
rf_confusion_matrix = get_normalized_confusion_matrix(cs_y_test, rf_y_predicted)



In [44]:

    
rf_results_dump = {
    "training_time": rf_training_time,
    "testing_time": rf_testing_time,
    "training_accuracy": rf_train_accuracy,
    "final_accuracy": rf_accuracy,
    "final_confusion_matrix": rf_confusion_matrix
}

with open("results/cs_rf.pickle", "wb") as f:
    pickle.dump(rf_results_dump, f)

XGBOOST



In [53]:

    
cs_X_train.shape









    Out[53]:





(34945, 72)



In [54]:

    
cs_X_test.shape









    Out[54]:





(42079, 72)



In [20]:

    
cs_train_xgbmatrix = xgb.DMatrix(cs_X_train, cs_y_train)
cs_test_xgbmatrix = xgb.DMatrix(cs_X_test, cs_y_test)
cs_watchlist = [(cs_train_xgbmatrix, "train"), (cs_test_xgbmatrix, "eval")]



In [21]:

    
cs_eval_results = {}
cs_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=cs_train_xgbmatrix, evals=cs_watchlist, evals_result=cs_eval_results,
                          num_boost_round=XGB_NUM_ROUNDS, early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)









    



[0]	train-merror:0.053112	eval-merror:0.282588
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
[100]	train-merror:0.000601	eval-merror:0.095891
Stopping. Best iteration:
[78]	train-merror:0.000916	eval-merror:0.094536



In [22]:

    
xgboost_training_start = time.time()
cs_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=cs_train_xgbmatrix, num_boost_round=cs_validation.best_iteration+1)
xgboost_training_time = (time.time() - xgboost_training_start)
xgboost_training_time









    Out[22]:





7.399036884307861



In [55]:

    
total_time = list()



In [ ]:

    
# training
for i in range(cs_X_train.shape[0]):
    x = cs_X_train[i,:]
    x = x.reshape((1, 72))
    x_dmatrix = xgb.DMatrix(x)
    start = time.time()
    cs_booster.predict(x_dmatrix)
    total_time.append(time.time() - start)

# testing
for i in range(cs_X_test.shape[0]):
    x = cs_X_test[i,:]
    x = x.reshape((1, 72))
    x_dmatrix = xgb.DMatrix(x)
    start = time.time()
    cs_booster.predict(x_dmatrix)
    total_time.append(time.time() - start)



In [ ]:

    
avg_time = np.mean(total_time)



In [ ]:

    
avg_time * 1000



In [ ]:

    
std_time = np.std(total_time)



In [ ]:

    
std_time * 1000



In [50]:

    
xgboost_testing_start = time.time()
cs_y_predicted = cs_booster.predict(cs_test_xgbmatrix)
xgboost_testing_time = (time.time() - xgboost_testing_start)
xgboost_testing_time









    Out[50]:





0.2311539649963379



In [51]:

    
cs_y_train_predicted = rf_clf.predict(cs_X_train)
cs_train_accuracy = metrics.accuracy_score(cs_y_train, cs_y_train_predicted)
cs_train_accuracy









    Out[51]:





1.0



In [52]:

    
cs_accuracy = metrics.accuracy_score(cs_y_test, cs_y_predicted)
cs_accuracy









    Out[52]:





0.90546353287863302



In [53]:

    
cs_confusion_matrix = get_normalized_confusion_matrix(cs_y_test, cs_y_predicted)



In [54]:

    
cs_confusion_matrix_subjects = list()

for subject_id in unique_subjects:
    subject_features = features_df[features_df["subject"] == subject_id]
    subject_features = subject_features.drop(["subject"], axis=1)
    subject_labels = labels_df[labels_df["subject"] == subject_id]
    subject_labels = subject_labels[["activity"]]
    subject_X = subject_features.values
    subject_y = subject_labels.values

    subject_xgbmatrix = xgb.DMatrix(subject_X, subject_y)
    subject_y_predicted = cs_booster.predict(subject_xgbmatrix)
    
    subject_accuracy = metrics.accuracy_score(subject_y, subject_y_predicted)
    subject_confusion_matrix = get_normalized_confusion_matrix(subject_y, subject_y_predicted)
    cs_confusion_matrix_subjects.append((subject_id, subject_accuracy, subject_confusion_matrix))



In [55]:

    
for activity_idx, activity in enumerate(ACTIVITIES):
    activity_accuracy = cs_confusion_matrix[activity_idx, activity_idx]
    activity_error = 100 - activity_accuracy
    print(activity, "\tAccuracy:", activity_accuracy, "\tError:", activity_error)









    



Standing 	Accuracy: 96.5608835454 	Error: 3.43911645463
Sitting 	Accuracy: 98.2380659701 	Error: 1.76193402991
Pointing 	Accuracy: 92.987612849 	Error: 7.01238715096
Phone 	Accuracy: 63.8150093173 	Error: 36.1849906827
Tablet 	Accuracy: 93.1763766959 	Error: 6.82362330407
Paper 	Accuracy: 91.7335243553 	Error: 8.2664756447



In [56]:

    
cs_results_dump = {
    "training_time": xgboost_training_time,
    "testing_time": xgboost_testing_time,
    "eval_results": cs_eval_results,
    "eval_earlystoppping_best_iteration": cs_validation.best_iteration+1,
    "eval_earlystoppping_best_score": cs_validation.best_score,
    "classifier": cs_booster,
    "training_accuracy": cs_train_accuracy,
    "final_accuracy": cs_accuracy,
    "final_confusion_matrix": cs_confusion_matrix,
    "subject_confusion_matrix": cs_confusion_matrix_subjects
}

with open("results/cs.pickle", "wb") as f:
    pickle.dump(cs_results_dump, f)

NO INFRARED



In [57]:

    
noinfrared_X_train = noinfrared_data["X_train"]
noinfrared_y_train = noinfrared_data["y_train"]
noinfrared_X_test = noinfrared_data["X_test"]
noinfrared_y_test = noinfrared_data["y_test"]



In [58]:

    
noinfrared_X_train.shape









    Out[58]:





(34945, 66)



In [59]:

    
noinfrared_X_test.shape









    Out[59]:





(42079, 66)



In [60]:

    
noinfrared_train_xgbmatrix = xgb.DMatrix(noinfrared_X_train, noinfrared_y_train)
noinfrared_test_xgbmatrix = xgb.DMatrix(noinfrared_X_test, noinfrared_y_test)
noinfrared_watchlist = [(noinfrared_train_xgbmatrix, "train"), (noinfrared_test_xgbmatrix, "eval")]



In [61]:

    
noinfrared_eval_results = {}
noinfrared_validation = xgb.train(params=XGB_PARAM_FINAL, dtrain=noinfrared_train_xgbmatrix, evals=noinfrared_watchlist,
                                  evals_result=noinfrared_eval_results, num_boost_round=XGB_NUM_ROUNDS, 
                                  early_stopping_rounds=XGB_EARLYSTOPPING_ROUNDS, verbose_eval=100)









    



[0]	train-merror:0.069166	eval-merror:0.290644
Multiple eval metrics have been passed: 'eval-merror' will be used for early stopping.

Will train until eval-merror hasn't improved in 30 rounds.
Stopping. Best iteration:
[45]	train-merror:0.002318	eval-merror:0.173507



In [62]:

    
noinfrared_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=noinfrared_train_xgbmatrix,
                               num_boost_round=noinfrared_validation.best_iteration+1)



In [63]:

    
noinfrared_y_predicted = noinfrared_booster.predict(noinfrared_test_xgbmatrix)



In [64]:

    
noinfrared_accuracy = metrics.accuracy_score(noinfrared_y_test, noinfrared_y_predicted)
noinfrared_accuracy









    Out[64]:





0.82649302502435895



In [65]:

    
noinfrared_confusion_matrix = get_normalized_confusion_matrix(noinfrared_y_test, noinfrared_y_predicted)



In [66]:

    
noninfrared_results_dump = {
    "eval_results": noinfrared_eval_results,
    "eval_earlystoppping_best_iteration": noinfrared_validation.best_iteration+1,
    "eval_earlystoppping_best_score": noinfrared_validation.best_score,
    "classifier": noinfrared_booster,
    "final_accuracy": noinfrared_accuracy,
    "final_confusion_matrix": noinfrared_confusion_matrix,
}

with open("results/cs_noinfrared.pickle", "wb") as f:
    pickle.dump(noninfrared_results_dump, f)

All Cross-Subjects



In [67]:

    
cs_combinations = list(itertools.combinations(unique_subjects, int(len(unique_subjects)/2)))
len(cs_combinations)









    Out[67]:





924



In [68]:

    
cs_combinations_results_csv = "results/cs_combinations.csv"



In [69]:

    
open(cs_combinations_results_csv, "w").close()
with open(cs_combinations_results_csv, "a") as f:
    data_columns = pd.DataFrame(columns=["combination", "activity", "a1", "a2", "a3", "a4", "a5", "a6"])
    data_columns.to_csv(f, header=True, index=False)



In [70]:

    
for cs_combination_idx, cs_combination in enumerate(cs_combinations):
    print(cs_combination_idx, "... ", end="")

    # Get data
    combination_X_train, combination_y_train, combination_X_test, combination_y_test = crosssubject_test_split(
        features_df, labels_df, cs_combination)
    combination_train_xgbmatrix = xgb.DMatrix(combination_X_train, combination_y_train)
    combination_test_xgbmatrix = xgb.DMatrix(combination_X_test, combination_y_test)

    # Train
    combination_booster = xgb.train(params=XGB_PARAM_FINAL, dtrain=combination_train_xgbmatrix, num_boost_round=cs_validation.best_iteration+1)
    combination_y_predicted = combination_booster.predict(combination_test_xgbmatrix)
    
    # Get results
    combination_results = metrics.confusion_matrix(combination_y_test, combination_y_predicted)
    combination_results_df = pd.DataFrame(columns=["combination", "activity", "a1", "a2", "a3", "a4", "a5", "a6"])
    for activity_id, activity in enumerate(ACTIVITIES):
        combination_results_df.loc[activity_id] = [
            cs_combination_idx, activity, 
            combination_results[activity_id,0], combination_results[activity_id,1], combination_results[activity_id,2], 
            combination_results[activity_id,3], combination_results[activity_id,4], combination_results[activity_id,5]
        ]

    # Append results
    with open(cs_combinations_results_csv, "a") as f:
        combination_results_df.to_csv(f, header=False, index=False)









    



0 ... 1 ... 2 ... 3 ... 4 ... 5 ... 6 ... 7 ... 8 ... 9 ... 10 ... 11 ... 12 ... 13 ... 14 ... 15 ... 16 ... 17 ... 18 ... 19 ... 20 ... 21 ... 22 ... 23 ... 24 ... 25 ... 26 ... 27 ... 28 ... 29 ... 30 ... 31 ... 32 ... 33 ... 34 ... 35 ... 36 ... 37 ... 38 ... 39 ... 40 ... 41 ... 42 ... 43 ... 44 ... 45 ... 46 ... 47 ... 48 ... 49 ... 50 ... 51 ... 52 ... 53 ... 54 ... 55 ... 56 ... 57 ... 58 ... 59 ... 60 ... 61 ... 62 ... 63 ... 64 ... 65 ... 66 ... 67 ... 68 ... 69 ... 70 ... 71 ... 72 ... 73 ... 74 ... 75 ... 76 ... 77 ... 78 ... 79 ... 80 ... 81 ... 82 ... 83 ... 84 ... 85 ... 86 ... 87 ... 88 ... 89 ... 90 ... 91 ... 92 ... 93 ... 94 ... 95 ... 96 ... 97 ... 98 ... 99 ... 100 ... 101 ... 102 ... 103 ... 104 ... 105 ... 106 ... 107 ... 108 ... 109 ... 110 ... 111 ... 112 ... 113 ... 114 ... 115 ... 116 ... 117 ... 118 ... 119 ... 120 ... 121 ... 122 ... 123 ... 124 ... 125 ... 126 ... 127 ... 128 ... 129 ... 130 ... 131 ... 132 ... 133 ... 134 ... 135 ... 136 ... 137 ... 138 ... 139 ... 140 ... 141 ... 142 ... 143 ... 144 ... 145 ... 146 ... 147 ... 148 ... 149 ... 150 ... 151 ... 152 ... 153 ... 154 ... 155 ... 156 ... 157 ... 158 ... 159 ... 160 ... 161 ... 162 ... 163 ... 164 ... 165 ... 166 ... 167 ... 168 ... 169 ... 170 ... 171 ... 172 ... 173 ... 174 ... 175 ... 176 ... 177 ... 178 ... 179 ... 180 ... 181 ... 182 ... 183 ... 184 ... 185 ... 186 ... 187 ... 188 ... 189 ... 190 ... 191 ... 192 ... 193 ... 194 ... 195 ... 196 ... 197 ... 198 ... 199 ... 200 ... 201 ... 202 ... 203 ... 204 ... 205 ... 206 ... 207 ... 208 ... 209 ... 210 ... 211 ... 212 ... 213 ... 214 ... 215 ... 216 ... 217 ... 218 ... 219 ... 220 ... 221 ... 222 ... 223 ... 224 ... 225 ... 226 ... 227 ... 228 ... 229 ... 230 ... 231 ... 232 ... 233 ... 234 ... 235 ... 236 ... 237 ... 238 ... 239 ... 240 ... 241 ... 242 ... 243 ... 244 ... 245 ... 246 ... 247 ... 248 ... 249 ... 250 ... 251 ... 252 ... 253 ... 254 ... 255 ... 256 ... 257 ... 258 ... 259 ... 260 ... 261 ... 262 ... 263 ... 264 ... 265 ... 266 ... 267 ... 268 ... 269 ... 270 ... 271 ... 272 ... 273 ... 274 ... 275 ... 276 ... 277 ... 278 ... 279 ... 280 ... 281 ... 282 ... 283 ... 284 ... 285 ... 286 ... 287 ... 288 ... 289 ... 290 ... 291 ... 292 ... 293 ... 294 ... 295 ... 296 ... 297 ... 298 ... 299 ... 300 ... 301 ... 302 ... 303 ... 304 ... 305 ... 306 ... 307 ... 308 ... 309 ... 310 ... 311 ... 312 ... 313 ... 314 ... 315 ... 316 ... 317 ... 318 ... 319 ... 320 ... 321 ... 322 ... 323 ... 324 ... 325 ... 326 ... 327 ... 328 ... 329 ... 330 ... 331 ... 332 ... 333 ... 334 ... 335 ... 336 ... 337 ... 338 ... 339 ... 340 ... 341 ... 342 ... 343 ... 344 ... 345 ... 346 ... 347 ... 348 ... 349 ... 350 ... 351 ... 352 ... 353 ... 354 ... 355 ... 356 ... 357 ... 358 ... 359 ... 360 ... 361 ... 362 ... 363 ... 364 ... 365 ... 366 ... 367 ... 368 ... 369 ... 370 ... 371 ... 372 ... 373 ... 374 ... 375 ... 376 ... 377 ... 378 ... 379 ... 380 ... 381 ... 382 ... 383 ... 384 ... 385 ... 386 ... 387 ... 388 ... 389 ... 390 ... 391 ... 392 ... 393 ... 394 ... 395 ... 396 ... 397 ... 398 ... 399 ... 400 ... 401 ... 402 ... 403 ... 404 ... 405 ... 406 ... 407 ... 408 ... 409 ... 410 ... 411 ... 412 ... 413 ... 414 ... 415 ... 416 ... 417 ... 418 ... 419 ... 420 ... 421 ... 422 ... 423 ... 424 ... 425 ... 426 ... 427 ... 428 ... 429 ... 430 ... 431 ... 432 ... 433 ... 434 ... 435 ... 436 ... 437 ... 438 ... 439 ... 440 ... 441 ... 442 ... 443 ... 444 ... 445 ... 446 ... 447 ... 448 ... 449 ... 450 ... 451 ... 452 ... 453 ... 454 ... 455 ... 456 ... 457 ... 458 ... 459 ... 460 ... 461 ... 462 ... 463 ... 464 ... 465 ... 466 ... 467 ... 468 ... 469 ... 470 ... 471 ... 472 ... 473 ... 474 ... 475 ... 476 ... 477 ... 478 ... 479 ... 480 ... 481 ... 482 ... 483 ... 484 ... 485 ... 486 ... 487 ... 488 ... 489 ... 490 ... 491 ... 492 ... 493 ... 494 ... 495 ... 496 ... 497 ... 498 ... 499 ... 500 ... 501 ... 502 ... 503 ... 504 ... 505 ... 506 ... 507 ... 508 ... 509 ... 510 ... 511 ... 512 ... 513 ... 514 ... 515 ... 516 ... 517 ... 518 ... 519 ... 520 ... 521 ... 522 ... 523 ... 524 ... 525 ... 526 ... 527 ... 528 ... 529 ... 530 ... 531 ... 532 ... 533 ... 534 ... 535 ... 536 ... 537 ... 538 ... 539 ... 540 ... 541 ... 542 ... 543 ... 544 ... 545 ... 546 ... 547 ... 548 ... 549 ... 550 ... 551 ... 552 ... 553 ... 554 ... 555 ... 556 ... 557 ... 558 ... 559 ... 560 ... 561 ... 562 ... 563 ... 564 ... 565 ... 566 ... 567 ... 568 ... 569 ... 570 ... 571 ... 572 ... 573 ... 574 ... 575 ... 576 ... 577 ... 578 ... 579 ... 580 ... 581 ... 582 ... 583 ... 584 ... 585 ... 586 ... 587 ... 588 ... 589 ... 590 ... 591 ... 592 ... 593 ... 594 ... 595 ... 596 ... 597 ... 598 ... 599 ... 600 ... 601 ... 602 ... 603 ... 604 ... 605 ... 606 ... 607 ... 608 ... 609 ... 610 ... 611 ... 612 ... 613 ... 614 ... 615 ... 616 ... 617 ... 618 ... 619 ... 620 ... 621 ... 622 ... 623 ... 624 ... 625 ... 626 ... 627 ... 628 ... 629 ... 630 ... 631 ... 632 ... 633 ... 634 ... 635 ... 636 ... 637 ... 638 ... 639 ... 640 ... 641 ... 642 ... 643 ... 644 ... 645 ... 646 ... 647 ... 648 ... 649 ... 650 ... 651 ... 652 ... 653 ... 654 ... 655 ... 656 ... 657 ... 658 ... 659 ... 660 ... 661 ... 662 ... 663 ... 664 ... 665 ... 666 ... 667 ... 668 ... 669 ... 670 ... 671 ... 672 ... 673 ... 674 ... 675 ... 676 ... 677 ... 678 ... 679 ... 680 ... 681 ... 682 ... 683 ... 684 ... 685 ... 686 ... 687 ... 688 ... 689 ... 690 ... 691 ... 692 ... 693 ... 694 ... 695 ... 696 ... 697 ... 698 ... 699 ... 700 ... 701 ... 702 ... 703 ... 704 ... 705 ... 706 ... 707 ... 708 ... 709 ... 710 ... 711 ... 712 ... 713 ... 714 ... 715 ... 716 ... 717 ... 718 ... 719 ... 720 ... 721 ... 722 ... 723 ... 724 ... 725 ... 726 ... 727 ... 728 ... 729 ... 730 ... 731 ... 732 ... 733 ... 734 ... 735 ... 736 ... 737 ... 738 ... 739 ... 740 ... 741 ... 742 ... 743 ... 744 ... 745 ... 746 ... 747 ... 748 ... 749 ... 750 ... 751 ... 752 ... 753 ... 754 ... 755 ... 756 ... 757 ... 758 ... 759 ... 760 ... 761 ... 762 ... 763 ... 764 ... 765 ... 766 ... 767 ... 768 ... 769 ... 770 ... 771 ... 772 ... 773 ... 774 ... 775 ... 776 ... 777 ... 778 ... 779 ... 780 ... 781 ... 782 ... 783 ... 784 ... 785 ... 786 ... 787 ... 788 ... 789 ... 790 ... 791 ... 792 ... 793 ... 794 ... 795 ... 796 ... 797 ... 798 ... 799 ... 800 ... 801 ... 802 ... 803 ... 804 ... 805 ... 806 ... 807 ... 808 ... 809 ... 810 ... 811 ... 812 ... 813 ... 814 ... 815 ... 816 ... 817 ... 818 ... 819 ... 820 ... 821 ... 822 ... 823 ... 824 ... 825 ... 826 ... 827 ... 828 ... 829 ... 830 ... 831 ... 832 ... 833 ... 834 ... 835 ... 836 ... 837 ... 838 ... 839 ... 840 ... 841 ... 842 ... 843 ... 844 ... 845 ... 846 ... 847 ... 848 ... 849 ... 850 ... 851 ... 852 ... 853 ... 854 ... 855 ... 856 ... 857 ... 858 ... 859 ... 860 ... 861 ... 862 ... 863 ... 864 ... 865 ... 866 ... 867 ... 868 ... 869 ... 870 ... 871 ... 872 ... 873 ... 874 ... 875 ... 876 ... 877 ... 878 ... 879 ... 880 ... 881 ... 882 ... 883 ... 884 ... 885 ... 886 ... 887 ... 888 ... 889 ... 890 ... 891 ... 892 ... 893 ... 894 ... 895 ... 896 ... 897 ... 898 ... 899 ... 900 ... 901 ... 902 ... 903 ... 904 ... 905 ... 906 ... 907 ... 908 ... 909 ... 910 ... 911 ... 912 ... 913 ... 914 ... 915 ... 916 ... 917 ... 918 ... 919 ... 920 ... 921 ... 922 ... 923 ...



In [71]:

    
combinations_results_df = pd.read_csv(cs_combinations_results_csv)



In [72]:

    
combinations_confusion_matrix = np.zeros((num_activities, num_activities))

for activity_idx, activity in enumerate(ACTIVITIES):
    combinations_activity_results = combinations_results_df[combinations_results_df["activity"] == activity]
    for accuracy_idx, accuracy_column in enumerate(["a1", "a2", "a3", "a4", "a5", "a6"]):
        combinations_confusion_matrix[activity_idx, accuracy_idx] = combinations_activity_results[accuracy_column].sum()

combinations_confusion_matrix_normalized = combinations_confusion_matrix.astype("float") / combinations_confusion_matrix.sum(axis=1)[:, np.newaxis]
combinations_confusion_matrix_normalized *= 100



In [73]:

    
all_samples = np.sum(combinations_confusion_matrix)



In [74]:

    
accurate_samples = 0
for activity_id in range(len(ACTIVITIES)):
    accurate_samples += combinations_confusion_matrix[activity_id, activity_id]



In [75]:

    
combinations_accuracy = accurate_samples / all_samples
combinations_accuracy









    Out[75]:





0.89485413103376332



In [76]:

    
combinations_results_dump = {
    "accuracy": combinations_accuracy,
    "confusion_matrix": combinations_confusion_matrix_normalized,
}



In [77]:

    
with open("results/cs_combinations.pickle", "wb") as f:
    pickle.dump(combinations_results_dump, f)

DEMO



In [78]:

    
# X.shape



In [79]:

    
# y.shape



In [80]:

    
# demo_train_xgbmatrix = xgb.DMatrix(X, y)
# demo_test_xgbmatrix = xgb.DMatrix(X, y)
# demo_watchlist = [(demo_train_xgbmatrix, "train"), (demo_test_xgbmatrix, "eval")]



In [81]:

    
# demo_results = {}
# demo_booster = xgb.train(XGB_PARAM_DEMO, demo_train_xgbmatrix, XGB_NUM_ROUNDS_DEMO, demo_watchlist, evals_result=demo_results, early_stopping_rounds=20)



In [82]:

    
# demo_booster.save_model("demo-xgboost.model")



In [83]:

    
# bst2 = xgb.Booster(model_file="demo-xgboost.model")



In [84]:

    
# test_dmatrix = xgb.DMatrix(X)
# y_predicted = bst2.predict(test_dmatrix)
# accuracy = metrics.accuracy_score(y, y_predicted)



In [85]:

    
# accuracy

	layer_area_0	layer_area_1	layer_area_2	layer_contours_0	layer_contours_1	layer_distance_0	layer_distance_1	layer_distance_2	layer_distance_3	layer_distance_4	...	interlayer_pos_16	interlayer_pos_17	extremities0	extreme_infrared_0	extreme_infrared_1	extreme_infrared_2	extreme_infrared_3	extreme_infrared_4	extreme_infrared_5	subject
0	0.297578	0.411765	0.290657	3.0	3.0	16.5529	26.6833	26.0192	26.6833	201.0	...	-26.0	-107.0	4.0	0.000000e+00	10.0	11.5	11.5	0.0	11.5	2001.0
1	0.310345	0.419238	0.270417	3.0	3.0	16.4012	26.4764	26.0192	26.4764	191.5	...	-26.0	-105.0	5.0	5.000000e-01	9.0	11.0	1.0	0.5	11.0	2001.0
2	0.333959	0.386492	0.279550	3.0	3.0	16.4012	26.2488	26.1725	26.2488	170.5	...	-25.0	-103.0	5.0	2.075076e-322	12.5	4.5	4.5	0.5	13.0	2001.0
3	0.348399	0.384181	0.267420	3.0	3.0	16.4012	26.4197	26.4764	26.4197	164.0	...	-25.0	-103.0	5.0	0.000000e+00	6.0	4.5	0.0	0.0	7.0	2001.0
4	0.356383	0.370567	0.273050	3.0	3.0	17.7200	27.4591	27.4591	27.4591	164.5	...	-26.0	-107.0	3.0	0.000000e+00	0.0	0.5	0.0	0.0	0.5	2001.0